/*******************************************************************************************************************
********************************************************************************************************************
********************************************************************************************************************
Do file for Phil Magness and Michael Makovi, "Did Karl Marx Party in 1891? The Effect of SPD's Erfurt Program on Karl Marx's Citations"
Code by Michael Makovi
********************************************************************************************************************
********************************************************************************************************************
********************************************************************************************************************
*******************************************************************************************************************/
clear
version 18.0

/*******************************************************************************************************
* Install and/or load necessary packages
********************************************************************************************************/

/*
* synth by Hainmueller, Abadie, and Diamond
ssc install synth, replace all

* Outdated version of synth:
* net from "https://web.stanford.edu/~jhain/Synth"
* net install synth, all replace force

* synth_runner by Quistorff and Galiani
net install synth_runner, from(https://raw.github.com/bquistorff/synth_runner/master/) replace

* parallel by Vega Yon and Quistorff
net install parallel, from(https://raw.github.com/gvegayon/parallel/master/) replace
mata mata mlib index

* github and rcall by Haghish (github is needed to install rcall)
net install github, from("https://haghish.github.io/github/")
github install haghish/rcall, stable
rcall: install.packages("readstata13", repos="http://cran.uk.r-project.org")
rcall_check 
rcall describe
* rcall setpath "if rcall cannot detect your R installation, then set the path here to R.exe"

* Wilson's harmonic mean p-value, implemented in R.
rcall: install.packages("harmonicmeanp", repos="http://cran.uk.r-project.org")

* estout by Ben Jann, in order to use estadd, to add results to e() matrices.
ssc install estout, replace

* matsave is not used, but it was potentially useful for our purposes, so this is kept as a reminder in case we ever need it
* ssc install matsave, replace all
*/

update query 

/*******************************************************************************************************
Change the current directory to the master folder containing this whole project,
and create a global variable "path" ($path) containing the path to this directory.

Each one of our synthetic control regression results comprises many separate files, 
so each separate SCM regression has its own folder. Saving the master path in a 
global variable will facilitate changing directories.

This do file is contained within a folder named "do files", that is in turn
contained in the master folder. So move up one directory and then save the 
name of the current directory. By saving the name of the master folder, we
facilitate changing directories later.
*******************************************************************************************************/
clear

* By default, we begin in the directory containing this do file.
* So move up one directory, to the master directory for this paper or project.
cd ..

* Now, c(pwd) contains the name of the current directory. This can be verified by disp c(pwd) or by creturn list
global path = c(pwd)
disp "$path"
disp c(pwd)

* Create a "results" folder if one does not already exist.
clear
cd "$path"
capture noisily mkdir "results"
capture noisily mkdir "$path\results\Ngram results"
capture noisily mkdir "$path\results\German newspaper results"

* Begin logging.
log using "$path\results\2 - Synth regressions.log", replace

clear

/*******************************************************************************************************
For quick bug-testing, change the following global from 0 to 1.
When set to 1, the dataset will be restricted to a limited set of authors,
and it will change convergence to "technique(dfp)" regardless of what it had previously been set to.
*******************************************************************************************************/
global quick_bug_testing = 0

/*******************************************************************************************************
Create a global variable specifying the convergence parameter for synth.
*******************************************************************************************************/
global convergence = "nested allopt technique(dfp) maxiter(30)"

if ($quick_bug_testing == 1) {
	global convergence = "technique(dfp)"
}

/*******************************************************************************************************
For maximum speed, we DO use the Intel Math Kernel Library 
See https://www.stata.com/manuals/m-1lapack.pdf

We set Mata to prefer maximizing speed over minimizing memory
See https://www.stata.com/manuals13/m-3mataset.pdf

We set "lapack_mkl on" and "mata set matafavor speed" 
permanently because the parallel module works by opening
additional instances of Stata, and we need these to have the 
same settings. 

Therefore, we save the savings which existed prior to running this do file, and
restore them at the end. This ensures that the change is not really permanent.
*******************************************************************************************************/
global previous_lapack_setting = c(lapack_mkl)
global previous_matafavor_setting = c(matafavor)

set lapack_mkl on, permanently
mata: mata set matafavor speed, permanently

/*******************************************************************************************************
Do some more system setup
*******************************************************************************************************/

* Turn off the "more" message when output is long
* Enable timer that will indicate execution time of all commands
* Set a seed so any results involving randomization will be replicable
set more off
set rmsg on
set seed 8675309

* Set the parallel module to use the number of logical processors minus 1, so that there's still some processing capacity left for the operating
* system and background process. But if (number of processors minus 1) is 0 or negative, then we use one processor
parallel numprocessors
local numprocessors_to_use = r(numprocessors)-1 
if (`numprocessors_to_use' <= 0) {
	local numprocessors_to_use = 1
}
parallel setclusters `numprocessors_to_use'

* if Stata/MP, use all of the processors available - the number of licensed cores or the number of logical processors, whichever is smaller
local is_MP = c(MP)
if (`is_MP') {
	local licensed_cores = c(processors_max)
	local logical_cores = c(processors_mach) 
	if (`licensed_cores' <= `logical_cores') {
		set processors `licensed_cores'
	}
	else {
		set processors `logical_cores'
	}
}
local is_MP = ""
local processors_max = ""

clear

/*******************************************************************************************************
Load additional author data for Kautsky.

For the sake of simplicity and replication, our procedure is as follows:
First, we will load additional author data and create separate DTA files for them.
Second, we will append this additional data to our original data as needed.

This will make it easier to use the original data from the JPE paper, and easier to tell what is original and what is new.

Note that any normalization must be performed *after* additional data are appended, never before. 
Adding additional data after normalization has been performed on the previous data will make the data incommensurable.
*******************************************************************************************************/
clear

* This program makes variables for every author consistent.
* First, we "keep Year English German French Spanish".
* Second, authors have their names spelled differently in different languages.
* For example, Aristotle is Aristoteles in German, Aristote in French, and Aristóteles in Spanish.
* These alternate spellings are reflected in the spreadsheet. For example, we have a column named "German – Aristoteles".
* So we "rename German German", "rename French French" and "rename Spanish Spanish" to make all the variables consistent in their naming.
* This works because in Stata, if you have a variable named "AB", then "rename A X", it will rename "AB" as "A" as long as there is no 
* other variable named AC to create ambiguity (does "A" refer to "AB" or "AC"?).
capture program drop MakeVariablesConsistent
program define MakeVariablesConsistent 
	keep Name Year English German French Spanish
	rename English cite_English
	rename German cite_German 
	rename French cite_French 
	rename Spanish cite_Spanish
	
	* For some reason, there are many observations with missing values of Year because blank spreadsheet rows were treated as observations. So drop these.
	drop if missing(Year)
	* Replace multiple spaces with single spaces in Name
	replace Name = stritrim(Name)
	* Remove trailing and leading blanks from Name
	replace Name = strtrim(Name) 

end

* Program to generate language dummy variables 
capture program drop GenerateLanguageDummies 
program define GenerateLanguageDummies 
	generate wrote_English = 0
		replace wrote_English = 1 if OriginalLanguage == "English"
	generate wrote_German = 0
		replace wrote_German = 1 if OriginalLanguage == "German"
	generate wrote_French = 0
		replace wrote_French = 1 if OriginalLanguage == "French"
	generate wrote_Greek = 0
		replace wrote_Greek = 1 if OriginalLanguage == "Greek"
	generate wrote_Latin = 0
		replace wrote_Latin = 1 if OriginalLanguage == "Latin"
	generate wrote_Spanish = 0
		replace wrote_Spanish = 1 if OriginalLanguage == "Spanish"
	generate wrote_Italian = 0
		replace wrote_Italian = 1 if OriginalLanguage == "Italian"
end

cd "$path\data files"

* Karl Kautsky citations
clear
import excel "Marx1850-2000-Ngram - additional authors.xlsx", sheet("Karl Kautsky") firstrow
generate Name = "Karl Kautsky"
MakeVariablesConsistent
save "KarlKautsky.dta", replace

* Author indicators
clear
import excel "Marx_author_indicators - additional authors.xlsx", sheet("Sheet1") firstrow
* For some reason, there are many observations with missing values of Name because blank rows were treated as observations. So drop these
drop if missing(Name)
* Replace multiple spaces with single spaces in Name
replace Name = stritrim(Name)
* Remove trailing and leading blanks from Name
replace Name = strtrim(Name) 
* Remove all spaces whatsoever from OriginalLanguage
replace OriginalLanguage = subinstr(OriginalLanguage," ","",999)
compress
save "Marx_author_indicators - additional authors.dta", replace

* Merge the DTA files of citations with the DTA file of indicators, merging by
* author's name, i.e. the variable "Name."
clear 
use "KarlKautsky.dta"
merge m:1 Name using "Marx_author_indicators - additional authors.dta"
GenerateLanguageDummies
drop Notes
save "additional_authors.dta", replace

* Finally, erase all the temporary DTA files we had to create
capture noisily erase "Marx_author_indicators - additional authors.dta"

clear
cd "$path"

**************************************************************************************************************************************************************
* Write some programs we will call frequently
**************************************************************************************************************************************************************

* Program to normalize citation counts (Ngrams: cite_English, cite_German cite_French cite_Spanish) (English news: cite_news) (German news: CiteNetNorm CiteAll CiteNet), 
* YearofPublication, and YearofTranslationtoEnglish to a maximum of 1 to accelerate convergence of numerical estimation.
capture program drop NormalizeVariables
program define NormalizeVariables

	* Normalize citations to a maximum of 1 by dividing by the maximum, but don't min-max normalize, to ensure that relative proportions are maintained
	foreach variable in cite_English cite_German cite_French cite_Spanish cite_news NewsCite_TotalNorm NewsCite_NetNorm {
		capture quietly recast double `variable'
		capture quietly summarize `variable'
		capture replace `variable' = `variable' / r(max)
	}

	* Min-max normalize indicator variables.
	foreach variable in YearofPublication YearofTranslationtoEnglish {
		recast double `variable'
		capture quietly summarize `variable'
		capture replace `variable' = (`variable' - r(min)) / (r(max)-r(min))
	}
	
end

* Program to encode a string Name into a number Name_no, and tsset 
capture program drop NumberUnits 
program define NumberUnits 
	encode Name, generate(Name_no)
	gen hyphen = "-"
	egen Name_no_Name = concat(Name_no hyphen Name)
	drop hyphen
	tabulate Name_no_Name
	tsset Name_no Year
end

* Load two programs from "save_synth_and_synth_runner_output.do": "save_synth_output" and "save_synth_runner_output"
* These two programs will save all output from "synth" and "synth_runner" where the current directory is
do "$path\do files\save_synth_and_synth_runner_output.do"

clear

/*************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
Google Ngrams

Synthetic control in German in 1891 for 4 authors: Karl Marx and the 3 authors of the Erfurt Program: August Bebel, Eduard Bernstein, and Karl Kautsky.
In each synthetic control, we drop the other 3 authors authors to avoid spillovers
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
*************************************************************************************************************************************************************/

capture program drop LoadData 
program define LoadData 
	
	clear
	cd "$path"
	use "$path\data files\all_authors_with_citations_and_indicators.dta"
	append using "$path\data files\additional_authors.dta"
	
	keep Name Year cite_* Political Socialist OriginalLanguage wrote_* Yearof* author_set 
	order Name Year cite_* Political Socialist OriginalLanguage wrote_* Yearof* author_set
	
	* As with the original JPE treatment=1917 paper, Hegel is dropped because the father and son are both named "Hegel." Hegel is only included for a single robustness test.
	drop if Name == "Hegel"	
	
	* The dataset contains both "Karl Marx" and "Marx", and only one can be kept at a time. "Marx" is measured only in English, whereas we are using German citations, so 
	* we do not need to drop "Marx." But for completeness, we do so anyway.
	drop if Name == "Marx"
	
	* For the synth graph, we will graph to 1932 so that we can visually compare the treatment effects of 1891 vs 1917. 
	* But for synth_runner p-values, we will end in 1904, just prior to the 1905 Russian Revolution (which may have a treatment effect of its own)
	drop if Year < 1879
	drop if Year > 1932
	
	keep if !missing(cite_German)
	
	NormalizeVariables
	NumberUnits
	compress 
	
end

capture program drop synth_regression
program define synth_regression
	
	* We run synthetic control with 2 end periods
	* 1. With synth, until 1932, to visually see the effects as late as our original JPE paper.
	* 2. With synth_runner, until 1901, to match the p-values we will obtain with synth_runner. We stop in 1901 for 2 reasons: 
	*		(1) so that our post-treatment period is not longer than our pre-treatment period and (2) to avoid conflating with the effects of the 1905 Russian Revolution.
	
	* We define a local macro containing a list of the language indicator variables to include.
	* Normally, all of these are included (the omitted language is Russian). 
	* But when the test includes socialists only, then the only languages in the dataset are 
	* English, French, and German. In that case, we include two and leave the third out.
	local Languages = "wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish"
	
	* For tests that drop all other Socialists, the Socialist binary variable should not be included.
	* Therefore, we define a local macro named "Socialist" that takes either the value "Socialist" or else "" (empty string),
	* depending on whether the dataset contains any socialists besides Marx.
	local Socialist = "Socialist"
	quietly count if (Socialist == 1 & Name != "Karl Marx")
	if (r(N) == 0) {
		local Socialist = ""
	}
	
	* For tests that include only socialists, both the Socialist and Political binary variables should not be included.
	* Therefore, we define a local macro named "Political" that takes either the value "Political" or else "" (empty string).
	* If the dataset consists solely of socialists, then both the macros Socialist and Political take the null value.
	* In addition, we only need to include two languages (out of English, German, and French) because socialists include only
	* three languages, so we include two and omit one.
	local Political = "Political"
	quietly count if Socialist == 1
	if (r(N) == _N) {
		local Socialist = ""
		local Political = ""
		local Languages = "wrote_English wrote_German"
	}
	
	* Thee Anti-Socialist Law starts 22 Oct 1878, fails to renew 25 Jan 1890, but actually expires 30 Sept 1890.
	* So we define the treatment in 1891 and the pre-treatment period as 1879-1890.
	* For practical purposes, 1879-1890 = continuous period of Anti-Socialist Law.
	* Thus, 1891 will be a double-treatment of the repeal of the law plus the promulgation of the Erfurt Program.
	
	synth cite_German ///
		YearofPublication `Languages' `Socialist' `Political' ///
		cite_German(1879(1)1881) cite_German(1882(1)1884) cite_German(1885(1)1887) cite_German(1888(1)1890), $convergence ///
		trunit($trunit) trperiod(1891) resultsperiod(1879(1)1932) mspeperiod(1879(1)1890) ///
		fig keep("synth_results") replace
		
		save_synth_output
	
	* synth_runner does not allow setting a resultsperiod, so instead, we have to drop all observations of Year > 1901. We use preserve...restore.
	preserve 
	drop if Year > 1901
	synth_runner cite_German ///
		YearofPublication `Languages' `Socialist' `Political' ///
		cite_German(1879(1)1881) cite_German(1882(1)1884) cite_German(1885(1)1887) cite_German(1888(1)1890), $convergence ///
		trunit($trunit) trperiod(1891) mspeperiod(1879(1)1890) /// 
		noredo_tr_error ///
		gen_vars ///
		keep("synth_runner_results") replace ///
		parallel 
		
		save_synth_runner_output
	
		* Cleanup
		drop lead cite_German_synth effect pre_rmspe post_rmspe
		parallel clean	
	restore
		
end
		
/******************************************************************************
Karl Marx, primary treatment (1891)

Because the repeal of the Anti-Socialist Law would have treated socialists too, we have 3 specifications:
1. Drop all socialists other than Marx, so that Marx is compared only to non-treated non-socialists. 
	This means Marx experiences a double-treatment of both the repeal of the law as well as the promulgation of the Erfurt Program.
	Therefore, the estimated treatment effect is an upper-bound.
2. Socialists only - to see if Marx's double treatment (repeal of law plus Erfurt Program) is larger than the socialists' single treatment (of the repeal of the law alone)
3. Full sample - just to see.
*******************************************************************************/

cd "$path\results\Ngram results"
capture noisily mkdir "Karl Marx - German Ngram citations - treatment 1891"
cd "Karl Marx - German Ngram citations - treatment 1891"

/********************
Drop all socialists
*********************/

clear 
LoadData
* Drop Bebel, Bernstein, and Kautsky, who are also potentially treated (by their authorship of the Erfurt Program).
drop if Name == "August Bebel" | Name == "Eduard Bernstein" | Name == "Karl Kautsky"
* Drop all other socialists besides Marx.
keep if Name == "Karl Marx" | Socialist == 0

quietly summarize Name_no if Name == "Karl Marx"
global trunit = r(mean)

cd "$path\results\Ngram results\Karl Marx - German Ngram citations - treatment 1891"
capture noisily mkdir "Drop socialists"
cd "Drop socialists"

synth_regression

/****************
Socialists only
*****************/
clear 
LoadData
* Drop Bebel, Bernstein, and Kautsky, who are also potentially treated (by their authorship of the Erfurt Program).
drop if Name == "August Bebel" | Name == "Eduard Bernstein" | Name == "Karl Kautsky"
* Keep only socialists.
keep if Socialist == 1

quietly summarize Name_no if Name == "Karl Marx"
global trunit = r(mean)

cd "$path\results\Ngram results\Karl Marx - German Ngram citations - treatment 1891"
capture mkdir "Socialists only"
cd "Socialists only"

synth_regression

/****************
Full sample
*****************/

clear 
LoadData
* Drop Bebel, Bernstein, and Kautsky, who are also potentially treated (by their authorship of the Erfurt Program).
drop if Name == "August Bebel" | Name == "Eduard Bernstein" | Name == "Karl Kautsky"

quietly summarize Name_no if Name == "Karl Marx"
global trunit = r(mean)

cd "$path\results\Ngram results\Karl Marx - German Ngram citations - treatment 1891"
capture noisily mkdir "Full sample (inc. socialists)"
cd "Full sample (inc. socialists)"

synth_regression
	
/*******************************************************************************
Karl Marx in English

In the original JPE paper, we performed an in-time placebo in English in 1889.
But that dataset included Bebel and Bernstein. Although we did not say so in the 
paper explicitly, we justified this to ourselves because Bebel was originally
inspired by Lassalle before he became a Marxist, and Bernstein had significant
differences with other Marxists. Still, to be safe, we should perform another 
in-time placebo test with none of the 3 Erfurt Program authors in the dataset.

Fortunately, the original paper had a robustness test (of the primary test for 1917),
which dropped all socialists, Bebel and Bernstein were dropped there. So our primary 
result for 1917 is not threatened.

But the original paper's in-time placebo is questionable, since that included
Bebel and Bernstein. So we need to redo an in-time placebo in light of our realization, 
with this paper, that Bebel and Bernstein ought to have been excluded.

In order to avoid a proliferation of tests, we use only the specification in which
socialists besides Marx are dropped, so that Marx experiences a double-treatment
and we estimate an upper-bound.
*******************************************************************************/

clear

cd "$path"
use "$path\data files\all_authors_with_citations_and_indicators.dta"
	
keep Name Year cite_* Political Socialist OriginalLanguage wrote_* Yearof* author_set 
order Name Year cite_* Political Socialist OriginalLanguage wrote_* Yearof* author_set
	
* Drop "Hegel" and "Marx" as they were dropped in the original JPE paper. Also drop the 3 Erfurt authors.
* Although Kautsky is not in this dataset, we drop him anyway for consistency with the rest of this do-file.	
drop if Name == "Hegel"	| Name == "Marx" | Name == "August Bebel" | Name == "Eduard Bernstein" | Name == "Karl Kautsky"
* Drop all other socialists besides Marx.
keep if Name == "Karl Marx" | Socialist == 0
	
drop if Year < 1879
drop if Year > 1932
	
keep if !missing(cite_English) & !missing(YearofTranslationtoEnglish)
	
NormalizeVariables
NumberUnits
compress 

quietly summarize Name_no if Name == "Karl Marx"
global trunit = r(mean)

cd "$path\results\Ngram results"
capture noisily mkdir "Karl Marx - English citations - treatment 1891"
cd "Karl Marx - English citations - treatment 1891"

synth cite_English ///
	YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish YearofTranslationtoEnglish /* Socialist */ Political ///
	cite_English(1879(1)1881) cite_English(1882(1)1884) cite_English(1885(1)1887) cite_English(1888(1)1890), $convergence ///
	trunit($trunit) trperiod(1891) resultsperiod(1879(1)1932) mspeperiod(1879(1)1890) ///
	fig keep("synth_results") replace
		
	save_synth_output
	
* Once again, for our p-values, synth_runner stops in 1901.  
drop if Year > 1901
synth_runner cite_English ///
	YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish YearofTranslationtoEnglish /* Socialist */ Political ///
	cite_English(1879(1)1881) cite_English(1882(1)1884) cite_English(1885(1)1887) cite_English(1888(1)1890), $convergence ///
	trunit($trunit) trperiod(1891) mspeperiod(1879(1)1890) /// 
	noredo_tr_error ///
	gen_vars ///
	keep("synth_runner_results") replace ///
	parallel 
		
	save_synth_runner_output
	
	* Cleanup
	drop lead cite_English_synth effect pre_rmspe post_rmspe
	parallel clean	

* Restore thread count
* parallel setclusters `numprocessors_to_use'

/******************************************************************************
Appendix:
Test Bebel, Bernstein, and Kautsky in 1891 and 1917

For simplicity, we use only non-socialists plus the "treated" unit, and estimate 
a double-treatment of both 1891 Erfurt and repeal of Anti-Socialist Law, yielding
an upper-bound estimate. Then we repeat with the treatment date in 1917 using the
same sample for comparability.
*******************************************************************************/

cd "$path\results\"
capture mkdir "Appendix results"
cd "$path\results\Appendix results\"

/* 
Two sets of regressions - one with treatment in 1891 and the other in 1917. Each includes both synth and synth_runner.
Since only non-socialist donors are included, we do not include the Socialist indicator variable.
Several of these authors fail to converge with nested allopt. Rather than trying to change the model specification, 
we will just estimate without nested allopt. So same model specification but reduced precision. Thus, we comment out
the parameter $convergence.
*/
capture program drop synth_regression
program define synth_regression

	******
	* 1891
	******
	
	capture mkdir "1891 treatment"
	cd "1891 treatment"	
	synth cite_German ///
		YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
		cite_German(1879(1)1881) cite_German(1882(1)1884) cite_German(1885(1)1887) cite_German(1888(1)1890), /* $convergence */ ///
		trunit($trunit) trperiod(1891) resultsperiod(1879(1)1932) mspeperiod(1879(1)1890) ///
		fig keep("synth_results") replace
		
		save_synth_output
	
	* synth_runner does not allow setting a resultsperiod, so instead, we have to drop all observations of Year > 1901. We use preserve...restore.
	preserve 
	drop if Year > 1901
	synth_runner cite_German ///
		YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
		cite_German(1879(1)1881) cite_German(1882(1)1884) cite_German(1885(1)1887) cite_German(1888(1)1890), /* $convergence */ ///
		trunit($trunit) trperiod(1891) mspeperiod(1879(1)1890) /// 
		noredo_tr_error ///
		gen_vars ///
		keep("synth_runner_results") replace ///
		parallel 
		
		save_synth_runner_output
	
		* Cleanup
		drop lead cite_German_synth effect pre_rmspe post_rmspe
		parallel clean	
	restore
	
	******
	* 1917
	******
	
	cd ..
	capture mkdir "1917 treatment"
	cd "1917 treatment"
	
	synth cite_German ///
		YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
		cite_German(1879(1)1880) cite_German(1884(1)1886) cite_German(1890(1)1892) cite_German(1896(1)1898) cite_German(1902(1)1904) cite_German(1908(1)1910) cite_German(1914(1)1916), /* $convergence */ ///
		trunit($trunit) trperiod(1917) resultsperiod(1879(1)1932) mspeperiod(1879(1)1916) ///
		fig keep("synth_results") replace
		
		save_synth_output
	
	preserve 
	drop if Year > 1932
	synth_runner cite_German ///
		YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
		cite_German(1879(1)1880) cite_German(1884(1)1886) cite_German(1890(1)1892) cite_German(1896(1)1898) cite_German(1902(1)1904) cite_German(1908(1)1910) cite_German(1914(1)1916), /* $convergence */ ///
		trunit($trunit) trperiod(1917) mspeperiod(1879(1)1916) /// 
		noredo_tr_error ///
		gen_vars ///
		keep("synth_runner_results") replace ///
		parallel 
		
		save_synth_runner_output
	
		* Cleanup
		drop lead cite_German_synth effect pre_rmspe post_rmspe
		parallel clean	
	restore
		
end
				
/****************
August Bebel 
****************/

clear 
LoadData

cd "$path\results\Appendix results\"
capture mkdir "August Bebel"
cd "$path\results\Appendix results\August Bebel"

* Drop Marx, Bernstein, and Kautsky, who are also potentially treated.
drop if Name == "Karl Marx" | Name == "Eduard Bernstein" | Name == "Karl Kautsky"

* Keep only non-socialists, to estimate an upper-bound double-treatment of both Erfurt and the repeal of the Anti-Socialist Law 
keep if Name == "August Bebel" | Socialist == 0

quietly summarize Name_no if Name == "August Bebel"
global trunit = r(mean)

synth_regression

/****************
Eduard Bernstein
****************/

clear 
LoadData

cd "$path\results\Appendix results\"
capture mkdir "Eduard Bernstein"
cd "$path\results\Appendix results\Eduard Bernstein"

* Drop Marx, Bebel, and Kautsky, who are also potentially treated.
drop if Name == "Karl Marx" | Name == "August Bebel" | Name == "Karl Kautsky"

* Keep only non-socialists, to estimate an upper-bound double-treatment of both Erfurt and the repeal of the Anti-Socialist Law 
keep if Name == "Eduard Bernstein" | Socialist == 0

quietly summarize Name_no if Name == "Eduard Bernstein"
global trunit = r(mean)

synth_regression

/****************
Karl Kautsky
****************/

clear 
LoadData

cd "$path\results\Appendix results\"
capture mkdir "Karl Kautsky"
cd "$path\results\Appendix results\Karl Kautsky"

* Drop Marx, Bebel, and Bernstein, who are also potentially treated.
drop if Name == "Karl Marx" | Name == "August Bebel" | Name == "Eduard Bernstein"

* Keep only non-socialists, to estimate an upper-bound double-treatment of both Erfurt and the repeal of the Anti-Socialist Law 
keep if Name == "Karl Kautsky" | Socialist == 0

quietly summarize Name_no if Name == "Karl Kautsky"
global trunit = r(mean)

synth_regression
	
/*************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
German newspapers 

If we decide to use any 1950s data, then subtract Neuer Vorwarts, &zdb_id=2143617-4
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
*************************************************************************************************************************************************************/	

/*******************************************************************************
Write some programs we will call frequently
*******************************************************************************/

capture program drop LoadData
program define LoadData 

	clear
	use "$path\data files\german_newspaper_data_w_indicators.dta", replace
	NormalizeVariables
	capture noisily NumberUnits
	
	* We begin in 1879 to be consistent with our foregoing tests of 1891 using German Google Ngrams.
	* Similarly, we start in 1879 for tests of both 1891 and 1917 for the sake of consistency.
	keep if Year >= 1879 & Year <= 1932
	
	* Drop "Marx" (as opposed to "Karl Marx"), Hegel, Bebel, Bernstein, and Kautsky - even though some of them shouldn't be in this dataset to begin with.
	drop if Name == "Marx" | Name == "Hegel" | Name == "August Bebel" | Name == "Eduard Bernstein" | Name == "Karl Kautsky"
	
	* In 1891, we drop all other socialists to avoid spillover effects due to the abrogation of the Anti-Socialist Law.
	* So do this in 1917 (or any other year) too, for direct comparability of the two effects of 1891 and 1917;
	* the two tests won't be using two different samples of donors.
	keep if Name == "Karl Marx" | Socialist == 0
	
end

* Program to run synthetic control for 1917 Russian Revolution 
* We will run 2 tests: (1) non-socialists only and (2) full-sample.
capture program drop synth_regression 
program define synth_regression

	* For tests that drop all other socialists, the Socialist binary variable should not be included.
	* Therefore, we define a local macro named "Socialist" that takes either the value "Socialist" or else "" (empty string),
	* depending on whether the dataset contains any socialists besides Marx.
	local Socialist = "Socialist"
	quietly count if (Socialist == 1 & Name != "Karl Marx")
	if (r(N) == 0) {
		local Socialist = ""
	}
	
	quietly summarize Name_no if Name == "Karl Marx"
	global trunit = r(mean)

	synth NewsCite_NetNorm ///
		YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish `Socialist' Political ///
		NewsCite_NetNorm(1879(1)1880) NewsCite_NetNorm(1884(1)1886) NewsCite_NetNorm(1890(1)1892) NewsCite_NetNorm(1896(1)1898) NewsCite_NetNorm(1902(1)1904) NewsCite_NetNorm(1908(1)1910) NewsCite_NetNorm(1914(1)1916), $convergence ///
		trunit($trunit) trperiod(1917) resultsperiod(1879(1)1932) mspeperiod(1879(1)1916) ///
		fig replace
	
		save_synth_output

	synth_runner NewsCite_NetNorm ///
		YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish `Socialist' Political ///
		NewsCite_NetNorm(1879(1)1880) NewsCite_NetNorm(1884(1)1886) NewsCite_NetNorm(1890(1)1892) NewsCite_NetNorm(1896(1)1898) NewsCite_NetNorm(1902(1)1904) NewsCite_NetNorm(1908(1)1910) NewsCite_NetNorm(1914(1)1916), $convergence ///
		trunit($trunit) trperiod(1917) mspeperiod(1879(1)1916) /// 
		noredo_tr_error ///
		gen_vars ///
		keep("synth_runner_results") replace ///
		parallel 
		
		save_synth_runner_output

		* Cleanup
		drop lead NewsCite_NetNorm_synth effect pre_rmspe post_rmspe
		parallel clean		

end
	
/*******************************************************************************
Test of 1917 Russian Revolution
*******************************************************************************/

cd "$path\results\German newspaper results\"
capture noisily mkdir "1917 Russian Rev"
cd "1917 Russian Rev"

***************
* Non-socialists only (besides Marx himself, of course).
* To be consistent with the sample used to test the 1891 Erfurt Program below
***************

cd "$path\results\German newspaper results\1917 Russian Rev"
capture noisily mkdir "Drop socialists"
cd "Drop socialists"

* The LoadData program will drop socialists; the same program will be used to test 1891.
clear 
LoadData 

synth_regression	
	
***************
* Full sample
***************

cd "$path\results\German newspaper results\1917 Russian Rev"
capture noisily mkdir "Full sample"
cd "Full sample"

* Same code as LoadData program except socialists are not dropped.
clear 
use "$path\data files\german_newspaper_data_w_indicators.dta", replace
NormalizeVariables
capture noisily NumberUnits
keep if Year >= 1879 & Year <= 1932
drop if Name == "Marx" | Name == "Hegel" | Name == "August Bebel" | Name == "Eduard Bernstein" | Name == "Karl Kautsky"

synth_regression

/*******************************************************************************	
Effect of 1891 Erfurt Program on Karl Marx 
*******************************************************************************/

clear 
LoadData 

cd "$path\results\German newspaper results\"
capture noisily mkdir "1891 Erfurt"
cd "1891 Erfurt"
	
synth NewsCite_NetNorm ///
	YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
	NewsCite_NetNorm(1879(1)1881) NewsCite_NetNorm(1882(1)1884) NewsCite_NetNorm(1885(1)1887) NewsCite_NetNorm(1888(1)1890), $convergence ///
	trunit($trunit) trperiod(1891) resultsperiod(1879(1)1932) mspeperiod(1879(1)1890) ///
	fig replace	
	
	save_synth_output
	
drop if Year > 1901	
synth_runner NewsCite_NetNorm ///
	YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
	NewsCite_NetNorm(1879(1)1881) NewsCite_NetNorm(1882(1)1884) NewsCite_NetNorm(1885(1)1887) NewsCite_NetNorm(1888(1)1890), $convergence ///
	trunit($trunit) trperiod(1891) mspeperiod(1879(1)1890) ///
	noredo_tr_error ///
	gen_vars ///
	keep("synth_runner_results") replace ///
	parallel 	
	
	save_synth_runner_output
	
	* Cleanup
	drop lead NewsCite_NetNorm_synth effect pre_rmspe post_rmspe
	parallel clean		
	
/******************************************************************************
Appendix:
Repeat tests of 1891 and 1917 with German newspapers, but this time, include the
socialist newspapers that we previously excluded.
*******************************************************************************/
	
cd "$path\results\Appendix results\"
capture noisily mkdir "Newspapers - now include socialist papers"
cd "$path\results\Appendix results\Newspapers - now include socialist papers"

clear 
LoadData 

quietly summarize Name_no if Name == "Karl Marx"
global trunit = r(mean)

***************
* 1891
***************

cd "$path\results\Appendix results\Newspapers - now include socialist papers"
capture noisily mkdir "1891"
cd "$path\results\Appendix results\Newspapers - now include socialist papers\1891"
	
synth NewsCite_TotalNorm ///
	YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
	NewsCite_TotalNorm(1879(1)1881) NewsCite_TotalNorm(1882(1)1884) NewsCite_TotalNorm(1885(1)1887) NewsCite_TotalNorm(1888(1)1890), $convergence ///
	trunit($trunit) trperiod(1891) resultsperiod(1879(1)1932) mspeperiod(1879(1)1890) ///
	fig replace	
	
	save_synth_output
	
preserve
drop if Year > 1901	
synth_runner NewsCite_TotalNorm ///
	YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
	NewsCite_TotalNorm(1879(1)1881) NewsCite_TotalNorm(1882(1)1884) NewsCite_TotalNorm(1885(1)1887) NewsCite_TotalNorm(1888(1)1890), $convergence ///
	trunit($trunit) trperiod(1891) mspeperiod(1879(1)1890) ///
	noredo_tr_error ///
	gen_vars ///
	keep("synth_runner_results") replace ///
	parallel 	
	
	save_synth_runner_output
	
	* Cleanup
	drop lead NewsCite_TotalNorm_synth effect pre_rmspe post_rmspe
	parallel clean	
restore
	
***************
* 1917
***************

cd "$path\results\Appendix results\Newspapers - now include socialist papers"
capture noisily mkdir "1917"
cd "$path\results\Appendix results\Newspapers - now include socialist papers\1917"
	
synth NewsCite_TotalNorm ///
	YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
	NewsCite_TotalNorm(1879(1)1880) NewsCite_TotalNorm(1884(1)1886) NewsCite_TotalNorm(1890(1)1892) NewsCite_TotalNorm(1896(1)1898) NewsCite_TotalNorm(1902(1)1904) NewsCite_TotalNorm(1908(1)1910) NewsCite_TotalNorm(1914(1)1916), $convergence ///
	trunit($trunit) trperiod(1917) resultsperiod(1879(1)1932) mspeperiod(1879(1)1916) ///
	fig replace
	
	save_synth_output

synth_runner NewsCite_TotalNorm ///
	YearofPublication wrote_English wrote_German wrote_French wrote_Greek wrote_Latin wrote_Italian wrote_Spanish /* Socialist */ Political ///
	NewsCite_TotalNorm(1879(1)1880) NewsCite_TotalNorm(1884(1)1886) NewsCite_TotalNorm(1890(1)1892) NewsCite_TotalNorm(1896(1)1898) NewsCite_TotalNorm(1902(1)1904) NewsCite_TotalNorm(1908(1)1910) NewsCite_TotalNorm(1914(1)1916), $convergence ///
	trunit($trunit) trperiod(1917) mspeperiod(1879(1)1916) /// 
	noredo_tr_error ///
	gen_vars ///
	keep("synth_runner_results") replace ///
	parallel 
		
	save_synth_runner_output

	* Cleanup
	drop lead NewsCite_TotalNorm_synth effect pre_rmspe post_rmspe
	parallel clean			
	
**************************************************************************************************************************************************************
* Clear everything and restore any settings which were permanently changed.
**************************************************************************************************************************************************************	
clear 
rcall clear
capture quietly erase "$path\.Rdata"
capture quietly erase "$path\do files\.Rdata"
parallel clean, all
set lapack_mkl $previous_lapack_setting, permanently
mata: mata set matafavor $previous_matafavor_setting, permanently
log close

